###### Estimating Political Trust in Europe
###### Stan Models
###### for *Immigration and Public Support for Political Systems in Europe*
###### Christopher Claassen 2022

library(MASS)
library(psych)
library(arm)
library(tidyr)
library(stringr)
library(loo) 
library(bayesplot)
library(cmdstanr)
library(viridis)

# options
options("cmdstanr_verbose" = TRUE)
stanc_options = list("O1")
options(mc.cores = parallel::detectCores())

# folders
WD = rstudioapi::getActiveDocumentContext()$path 
setwd(dirname(WD))
print( getwd() )

# read data 
trust = read.csv("pol_trust_marginals_250722_edit.csv")

# create wide dataset and test dimensionality of items
trust_wd_wvs = pivot_wider(trust[trust$Project == "WVS", c(1:4, 8)], names_from=Item, values_from=RespProp)
round(cor(trust_wd_wvs[, 4:7], use="pair"), 2)
alpha(cor(trust_wd_wvs[, 4:7], use="pair")) # 0.83

trust_wd_evs = pivot_wider(trust[trust$Project == "EVS", c(1:4, 8)], names_from=Item, values_from=RespProp)
round(cor(trust_wd_evs[, 4:7], use="pair"), 2)
alpha(cor(trust_wd_evs[, 4:7], use="pair")) # 0.86

trust_wd_ess = pivot_wider(trust[trust$Project == "ESS", c(1:4, 8)], names_from=Item, values_from=RespProp)
round(cor(trust_wd_ess[, 4:6], use="pair"), 2)
alpha(cor(trust_wd_ess[, 4:6], use="pair")) # 0.93

trust_wd_eurb = pivot_wider(trust[trust$Project == "EuroBar", c(1:4, 8)], names_from=Item, values_from=RespProp)
round(cor(trust_wd_eurb[, 4:8], use="pair"), 2)
alpha(cor(trust_wd_eurb[, 4:8], use="pair")) # 0.93

trust_wd_ceeb = pivot_wider(trust[trust$Project == "CEEB", c(1:4, 8)], names_from=Item, values_from=RespProp)
round(cor(trust_wd_ceeb[, 4:8], use="pair"), 2)
alpha(cor(trust_wd_ceeb[, 4:8], use="pair")) # 0.92

# create year by country indicators
year0 = 1980 # year before 1st estimates
yearl = 2020 # last year to estimate
trust = trust[trust$Year > year0,]
trust = unite(trust, YearCountry, c(Year, Country), sep = "_", remove = FALSE)

# create item by country indicators
trust = unite(trust, ItemCnt, c(Item, Country), sep = "_", remove = FALSE)

# create item by country by wave indicators
trust = unite(trust, ItemCntWv, c(ItemCnt, Wave), sep = "_", remove = FALSE)

# create year by project indicators
trust = unite(trust, YrProj, c(Year, Project), sep = "_", remove = FALSE)

# create year by project by country indicators
trust = unite(trust, YrProjCnt, c(YrProj, Country), sep = "_", remove = FALSE)

# create year by project by country by wave indicators
trust = unite(trust, YrProjCntWv, c(YrProjCnt, Wave), sep = "_", remove = FALSE)

# drop countries with less than 2 years of data?
cnt.obs.yrs = rowSums(table(trust$Country, trust$Year) > 0)
(drop.cnt = names(cnt.obs.yrs)[cnt.obs.yrs == 1])
trust1 = trust[!(trust$Country %in% drop.cnt), ]

# drop countries without Vdem data
trust2 = trust1[!(trust1$Country %in% c("Andorra","Belize","Palestine","Hong Kong",
                                        "Bahamas","Macau","Yugoslavia")), ]

# factorise
trust2$Country = as.factor(as.character(trust2$Country))
trust2$Item = as.factor(as.character(trust2$Item))
trust2$ItemCnt = as.factor(as.character(trust2$ItemCnt))
trust2$Project = as.factor(as.character(trust2$Project))
trust2$YrProj = as.factor(as.character(trust2$YrProj))
trust2$YrProjCntWv = as.factor(as.character(trust2$YrProjCntWv))
trust2$Year = trust2$Year-year0


## Prepare data for stan

# first year of estimates
first.yr = by(trust2$Year, trust2$Country, min, simplify=TRUE)
first_yr = as.numeric(first.yr)
cnt_start_yr = first_yr # year to start time-series regressions

# extract indicators and indices
n.items = length(unique(trust2$Item))
n.cntrys = length(unique(trust2$Country))
n.yrs = yearl-year0 
n.proj = length(unique(trust2$Project))
n.resp = dim(trust2)[1]
n.itm.cnt = length(unique(trust2$ItemCnt))
n.surv = length(unique(trust2$YrProjCntWv))
n.cntry.yrs = n.cntrys * n.yrs
cntrys = as.numeric(trust2$Country)
cnt.names = as.character(sort(unique(trust2$Country)))
items = as.numeric(factor(trust2$Item))
surv = as.numeric(factor(trust2$YrProjCnt))
yrs = trust2$Year 
projs = as.numeric(factor(trust2$Project))
itm.cnts = as.numeric(factor(trust2$ItemCnt))
mean.resp.log = logit(mean(trust2$RespProp))

# create item-country length indicator for items
# this tells us which item pertains to each item-country combination
item.ind.kp = rep(0, length(levels(trust2$ItemCnt)))
for(i in 1:length(levels(trust2$Item))) {
  item.ind.kp[grepl(levels(trust2$Item)[i], levels(trust2$ItemCnt))] = i
}

# create indicator showing the number of countries used for each item
item.ind.len.1 = lapply(levels(trust2$Item), function(x) grep(x, levels(trust2$ItemCnt)))
item.ind.len = sapply(item.ind.len.1, length)

# length of each estimated mood series for ragged array models
len_theta_ts = rep(0, n.cntrys)
for(i in 1:n.cntrys){
  len_theta_ts[i] = length(cnt_start_yr[i]:n.yrs)
}
theta_n = sum(len_theta_ts)  

# create r-length cntry vector
cntrys.r = rep(0, theta_n)
pos = 1
for(i in 1:n.cntrys) {
  cntrys.r[pos:(len_theta_ts[i]+pos-1)] = (rep(i, len_theta_ts[i]))
  pos = pos + len_theta_ts[i]
}

# create r-length year vector
year.r = rep(0, theta_n)
pos = 1
for(i in 1:n.cntrys) {
  year.r[pos:(len_theta_ts[i]+pos-1)] = cnt_start_yr[i]:n.yrs
  pos = pos + len_theta_ts[i]
}

# create R-vector mapping R support estimates to N opinions
n.map = data.frame(Obs=1:n.resp, Cntry=cntrys, Yr=yrs)
r.map = data.frame(Est=1:theta_n, Cntry=cntrys.r, Yr=year.r)
n.r.merg = merge(n.map, r.map, by=c("Cntry", "Yr"), all.x=TRUE)
n.r.merg = n.r.merg[order(n.r.merg$Obs),]

# create indicator vector for estimate start positions
est_pos = rep(1, n.cntrys)
est_pos[1] = 1
for(i in 2:n.cntrys) {
  est_pos[i] = est_pos[i-1] + len_theta_ts[i-1]
}

# create indicator vector for estimate end position
est_pos_2 = est_pos
for(i in 1:n.cntrys) {
  est_pos_2[i] = est_pos[i] + len_theta_ts[i] -1
}

# specify data for stan
dat.6.7 = list(N=n.resp, K=n.items, J=n.cntrys, P=n.itm.cnt, R=theta_n, jj=cntrys, pp=itm.cnts, 
               kk=items, rr=n.r.merg$Est, it_len=item.ind.len, est_pos=est_pos, x=trust2$RespN, 
               samp=trust2$Sample, mn_resp_log=mean.resp.log, len_theta_ts=len_theta_ts)

lapply(dat.6.7, function(x) sum(is.na(x)))
lapply(dat.6.7, sd)

# pars
pars.6.7 = c("Sigma","Omega","sigma_delta","sigma_theta","phi","mu_lambda","lambda","gamm","theta","delta","theta_init",
             "x_pred","log_lik")

# iterations
n.iter = 1000
n.warm = 500
n.samp = n.iter - n.warm
n.chn = 4


## Stan fit

stan.mod = cmdstan_model('stan.mod6.7.stan')

fit.mod= stan.mod$sample(
  data = dat.6.7,
  chains = n.chn,
  init = 1,
  thin = 1,
  parallel_chains = n.chn,
  iter_warmup = n.warm,
  iter_sampling = n.samp,
  refresh = round(n.iter/20, 0),
  adapt_delta = 0.9, 
  max_treedepth = 14,
  save_warmup = FALSE
)

# check model fit
res = fit.mod
sum = res$summary(pars.6.7)

diag1 = res$cmdstan_diagnose()
print(sum[order(sum$rhat, decreasing=TRUE), ], n=50)
res_neff_ratio = neff_ratio(res)
res_neff_ratio[order(res_neff_ratio, decreasing=FALSE)][1:50]

res.tab = res$print(pars.6.7, max_rows=80, digits=3)

# save results and model
write.csv(sum, "trust_stan_summary.csv", row.names=FALSE) 
fit.mod$save_object(file = "trust_stan_fit.RDS")


# traceplots
trace.6 = bayesplot::mcmc_trace(res$draws(), size=0.3, n_warmup=0, np=nuts_params(res),
                                pars=c("Sigma[1,1]", "Sigma[2,2]", "Omega[1,2]", "sigma_theta", "sigma_delta",
                                       "phi", "lambda[8]", "gamm[8]", "delta[23]", "theta[432]", "theta[265]"))
cols = viridis(4, begin=0.1, end=0.9, alpha=0.5)
pdf("Fig_S1_trace_trust.pdf", height=8, width=12)
trace.6 + ggplot2::scale_colour_manual(values=cols)
dev.off()

# rhat plots
pdf("Fig_S1_rhat_trust.pdf", height=6, width=5)
par(mfrow=c(1,1), mar=c(2.5, 2.5, 1, 1), cex=1, mgp=c(1.4, 0.3, 0), tcl=-0.2)
hist(sum$rhat, main="", xlab="")
grid()
box()
hist(sum$rhat,add=TRUE)
dev.off()


## Extract theta estimates

theta.out = apply(res$draws("theta"), 3, as.vector)
(theta.mean = mean(as.vector(theta.out)))
(theta.sd = sd(as.vector(theta.out)))
theta.std = (theta.out - theta.mean) / theta.sd # standardize
theta.t = apply(theta.std, 1, function(x) t(x) )
theta.pe = apply(theta.t, 1, mean)
theta.u95 = apply(theta.t, 1, quantile, probs=c(0.975))
theta.l95 = apply(theta.t, 1, quantile, probs=c(0.025))
theta.sd = apply(theta.t, 1, sd)
theta.df = data.frame(Country=cnt.names[r.map$Cntry], Year=r.map$Yr+year0, Trust=theta.pe, 
                      Trust_u95=theta.u95, Trust_l95=theta.l95, Trust_sd=theta.sd)
theta.dim = dim(theta.df)[2]
theta.samp.100 = theta.t[, sample(1:dim(theta.t)[2], 100)]
theta.draws = data.frame(Country=theta.df$Country, Year=theta.df$Year, theta.samp.100)

# N Resps per country-year
nresp = trust2[, c("Country", "Year", "Item", "RespProp")]
nresp$Year = nresp$Year+year0
n_resp = data.frame(table(nresp$Country, nresp$Year))
names(n_resp) = c("Country", "Year", "N_resp")
theta.df = merge(theta.df, n_resp, by=c("Country", "Year"), all.x=TRUE)

# save country-year trust estimates
write.csv(theta.df, "trust_est.csv", row.names=FALSE)


## Item analysis

# examine item pars
item.names = levels(trust2$Item)
item.pars = data.frame(
  item_int = res$summary("lambda")[,2][[1]], 
  item_slope = res$summary("gamm")[,2][[1]])
rownames(item.pars) = item.names
item.pars$Proj = as.character(trust2$Project[match(item.names, trust2$Item)])

# extract 100 draws from posteriors of ints and slopes
lambda.out = apply(res$draws("lambda"), 3, as.vector)
gamma.out = apply(res$draws("gamm"), 3, as.vector)

# adjust intercepts and slopes to account for standardization of theta 
theta.out = apply(res$draws("theta"), 3, as.vector)
(theta.raw.mean = mean(as.vector(theta.out)))
(theta.raw.sd = sd(as.vector(theta.out)))
item.pars.std = item.pars

# intercept (a): a^std = a + b*mu
item.pars.std[,1] = item.pars[,1] + (item.pars[,2] * theta.raw.mean)
lambda.out.std = lambda.out + (gamma.out * theta.raw.mean)

# slope (b): b^std = b*sigma
item.pars.std[,2] = item.pars[,2] * theta.raw.sd
gamma.out.std = gamma.out * theta.raw.sd

# CIs for slopes
apply(gamma.out.std, 2, quantile, probs=c(0.025, 0.975))
item.pars.std[,4:5] = t(apply(gamma.out.std, 2, quantile, probs=c(0.025, 0.975)))
names(item.pars.std)[4:5] = c("slope_l95CI", "slope_u95CI")
item.pars.std # on logit or std normal scale

# set up plot
dim(item.pars.std)

# plot
draw.ind = sample(1:n.samp*n.chn, 100)
png("Fig_S4_trust_int_slopes.png", height=1200, width=1500)
par(mfrow=c(5, 7), mar=c(1, 1, 0.2, 0.2), tcl=-0.15, las=0, cex=1.8)
for(i in 1:dim(item.pars.std)[1]){
  curve(arm::invlogit(item.pars.std[i,1] + x*item.pars.std[i,2]), -3, 3, type="l", 
        xlab="", ylab="", xaxt="n", yaxt="n", yaxs="i", col=rgb(1,1,1,1), ylim=c(0,1))
  abline(h=0.5, lty=3, lwd=0.75, col=rgb(0.5, 0.5, 0.5, 1))
  abline(v=0, lty=3, lwd=0.75, col=rgb(0.5, 0.5, 0.5, 1))
  curve(arm::invlogit(item.pars.std[i,1] + x*item.pars.std[i,2]), -3, 3, type="l", lwd=1.25, 
        col=rgb(0.48, 0.22, 0.55, 1), add=TRUE)
  for(j in 1:100){
    curve(arm::invlogit(lambda.out.std[draw.ind[j],i] + x*gamma.out.std[draw.ind[j],i]), 
          -3, 3, type="l", lwd=0.75, col=rgb(0.48, 0.22, 0.55, 0.1), add=TRUE)
  }
  text(-3, .95, rownames(item.pars.std)[i], adj=0, cex=0.8)
  axis(2, at=c(0, .5, 1), labels=c("0", "0.5", "1"), mgp=c(1.5, .15, 0), cex.axis=0.6)
  axis(1, at=c(-2, 0, 2), mgp=c(1, -0.15, 0), cex.axis=0.6)
}
dev.off()


## Plots of model fit

x.sim = apply(res$draws("x_pred"), 3, as.vector)
x.sim.pe = apply(x.sim, 2, mean, na.rm=TRUE) / trust2$Sample
x.sim.err = trust2$RespProp - x.sim.pe

# plot posterior predicted fit - counts
pdf('Fig_S3a_trust_obsim_count.pdf', width=5, height=5)
par(mfrow=c(1,1), mar=c(2.5,3,.5,.5), tcl=-0.2, las=1)
plot(y=trust2$RespN, x=x.sim.pe*trust2$Sample, type="n", xlab="", ylab="", axes=FALSE, xaxs="i",
     yaxs="i", main="", xlim=c(0, max(trust2$RespN)), ylim=c(0, max(trust2$RespN)))
abline(a=0, b=1, lty=2, col="black")
axis(side=1, tick=TRUE, mgp=c(1,0.15,0), cex.axis=0.8)
axis(side=2, tick=TRUE, mgp=c(1,0.4,0), cex.axis=0.8)
grid()
points(y=trust2$RespN, x=x.sim.pe*trust2$Sample, pch=16, cex=0.6, col=rgb(0,0,0,.3))
lines(lowess(y=trust2$RespN, x=x.sim.pe*trust2$Sample), col=rgb(0,0,0.5,1), lwd=2)
mtext(side=1, line=1.2, "Observed Response counts", cex=1)
mtext(side=2, line=2, "Simulated Response counts", cex=1, las=0)
box()
dev.off()

# plot posterior predicted fit - proportions
pdf('Fig_S3b_trust_obsim_prop.pdf', width=5, height=5)
par(mfrow=c(1,1), mar=c(2.5,3,.5,.5), tcl=-0.2, las=1)
plot(y=trust2$RespN/trust2$Sample, x=x.sim.pe, type="n", xlab="", ylab="", axes=FALSE, xaxs="i",
     yaxs="i", main="", xlim=c(0,1), ylim=c(0,1))
abline(a=0, b=1, lty=2, col="black")
axis(side=1, tick=TRUE, mgp=c(1,0.15,0), cex.axis=0.8)
axis(side=2, tick=TRUE, mgp=c(1,0.4,0), cex.axis=0.8)
grid()
points(y=trust2$RespN/trust2$Sample, x=x.sim.pe, pch=16, cex=0.6, col=rgb(0,0,0,.3))
lines(lowess(y=trust2$RespN/trust2$Sample, x=x.sim.pe), col=rgb(0,0,0.5,1), lwd=2)
mtext(side=1, line=1.2, at=0.5, "Observed Response proportions", cex=1)
mtext(side=2, line=2, at=0.5, "Simulated Response proportions", cex=1, las=0)
box()
dev.off()

# plot posterior predicted fit - response distributions
pdf("Fig_S3c_trust_obsim_distr.pdf", width=5, height=5)
par(mfrow=c(1,1), mar=c(2.5, 2, 1, .5), tcl=-0.2, las=1)
plot(density(trust2$RespN/trust2$Sample), type="n", axes=FALSE, xlim=c(0,1), ylim=c(0,3), xaxs="i",
     yaxs="i", main="", xlab="")
axis(side=1, tick=TRUE, mgp=c(1,0.15,0), cex.axis=0.8)
axis(side=2, tick=TRUE, mgp=c(1,0.4,0), cex.axis=0.8)
grid()
for(i in 1:20){
  lines(density(x.sim[i,]/trust2$Sample), lwd=1, col=rgb(1, 0.55, 0, 0.5))
}
lines(density(trust2$RespN/trust2$Sample), lwd=2.5, col=rgb(0, 0.55, 0.55, 1))
mtext(side=1, line=1.2, at=0.5, "Response proportions", cex=1)
legend("topleft", lwd=c(3, 1), col=c("darkcyan", "darkorange"), c("Observed", "Simulated"), 
       cex=0.9, bty="n")
box()
dev.off()


